# Set up sample for cost effectiveness calculations


pacman::p_load(data.table, tidyverse, sf, cowplot, kableExtra, modelsummary, scales, fst, tinytable, pbmcapply)

rm(list = ls())
source("code/globals.R")

# Load full statewide sample
samplehomes <- read_fst(file.path(WORKING, "statewide-sample.fst"), as.data.table = TRUE, to = NULL)

# Drop any tracts with average home WF risk < 0.1%.
samplehomes[, tract_average_wildfire_hazard := mean(wildfire_hazard, na.rm = T), by = tract]

samplehomes <- samplehomes %>% filter(tract_average_wildfire_hazard >= 0.001)

# Reduces sample to 1.1m, and 52 counties
# Removes Imperial, Inyo, Kings, San Francisco, Sutter, and Yolo because they don't have any tracts with home average WF risk > 0.1%

# Street indicator
samplehomes <- samplehomes %>%
  group_by(PropertyZip, PropertyStreetName, PropertyStreetSuffix) %>%
  mutate(streetID = cur_group_id()) %>%
  ungroup()

# Add HAZUS rebuilding costs ----
rebuildcosts <- read_fst(file.path(WORKING, "hazuscosts.fst"), as.data.table = T)

## Append block-specific Hazus losses to data file
samplehomes <- samplehomes %>% 
  mutate(CensusBlock = as.numeric(block)) %>%
  left_join(rebuildcosts, by = "CensusBlock") %>%
  mutate(hazuslosses = sqfeet * (BldgCostPerSqFt + ContentsPerSqFt + TempHousingPerSqFtPer24Months + DisruptionPerSqFt) + GarageCostTotal)

# Compute losses per square foot ----
## Note that the distribution of sq feet contains many small homes, but lots of these are presumably old. If evaluating a standard for NEW homes, should use something larger. Perhaps median square footage of post-XX year homes in that county or zip
samplehomes <- samplehomes %>% 
  mutate(persqftlosses = BldgCostPerSqFt + ContentsPerSqFt + TempHousingPerSqFtPer24Months + DisruptionPerSqFt)

# Clean data to only include homes with complete information -------


# Drop any homes that are missing key variables
# Reduces sample again to 1.07m and 50 counties
# This drops Del Norte and Mendocino  counties, since they don't provide year built or square foot information
samplehomes <- samplehomes %>%
  filter(complete.cases(combinedyear, regime, sqfeet, neighbors_in_30m_centroid, persqftlosses, hazuslosses, wildfire_hazard))


# Wildfire hazard in %
samplehomes <- samplehomes %>%
  mutate(wildfire_hazard_pct = wildfire_hazard * 100)

# Save sample for use in other scripts (right now, just 63-compute-cost-effectiveness-and-policy-counterfactuals.R and 70-compare-samples.R)
write_fst(samplehomes, file.path(WORKING, "statewide-costeff-sample.fst"))
